#1. Load Required Packages
required_packages <- c("readxl", "broom","forestmodel","ggplot2","gridExtra")
for (package in required_packages) {
  if (!requireNamespace(package, quietly = TRUE)) {
    install.packages(package)
  }
  library(package, character.only = TRUE)
}
#Description: This section ensures the required packages (readxl, broom, forestmodel, ggplot2, gridExtra) are installed and loaded.


#2. Import Data Frame
if (!exists("df")) {
  file_path <- "~/COHORT_Prediction_of_CIP.xlsx"
  df <- read_excel(file_path, sheet = "cohort", na = "#N/A")
}
#Description: This section imports data from an Excel file if the data frame df does not already exist.


#3. Filter Data
df_filtered <- df[, c("r_upper_suv_mean", "r_upper_suv_min", "r_upper_suv_max","r_upper_suv_sd", 
                      "r_lower_suv_mean", "r_lower_suv_min", "r_lower_suv_max","r_lower_suv_sd",
                      "l_upper_suv_mean", "l_upper_suv_min", "l_upper_suv_max","l_upper_suv_sd",
                      "l_lower_suv_mean", "l_lower_suv_min", "l_lower_suv_max","l_lower_suv_sd",
                      "pneumonitis","age","sex_male","bmi","copd","radio_after_pet", "side_r0_l1",
                      "pet_device","pet_distance_ici", "size", "weight","pet_impossible_spheres_placement")]

df_filtered_distance <- subset(df_filtered, pet_distance_ici <= 365 | is.na(pet_distance_ici))
df_filtered_distance <- subset(df_filtered_distance, pet_distance_ici >= -1)
df_filtered <- subset(df_filtered_distance, pet_impossible_spheres_placement == 0)
df_filtered[] <- lapply(df_filtered, as.numeric)
#Description: This section filters the data to include only records where distance between PET/CT and immunotherapy is between -1 and 365 days.


#4. Add Columns for Calculations
cols <- c("no_tumor_suv_mean", "no_tumor_suv_min", "no_tumor_suv_max", "no_tumor_suv_sd",
          "SUV95", "LBM","SULmean","SULmax")
df_filtered[,cols] <- NA
#Description: This section adds new columns to the filtered data frame for various calculated metrics.


#5. Define Functions for Calculations
#Define the function to calculate Lean Body Mass (LBM) based on BMI, height, and weight
calculate_LBM <- function(bmi, size, weight, sex) {
  size <- size*100
  if (sex == 1) {
    #male
    lbm <- (9.27 * 10^3 * weight) / ((6.68 * 10^3) + (216 * bmi))
  } else if (sex == 0) {
    #female
    lbm <- (9.27 * 10^3 * weight) / ((8.78 * 10^3) + (244 * bmi))
  } 
  return(lbm)
}

#Define the function to calculate SULmean and SULmax
calculate_SUL <- function(suv, lbm, weight) {
  sul <- NA
  if(!is.na(suv)){
    sul <- (suv / weight) * lbm
  }
  return(sul)
}


#6. Calculate SUV and Related Metrics
for (i in 1:nrow(df_filtered)) {
  
  if (is.na(df_filtered$l_upper_suv_mean[i]) & !is.na(df_filtered$r_upper_suv_mean[i])) {
    df_filtered$l_upper_suv_max[i] <- df_filtered$r_upper_suv_max[i]
    df_filtered$l_upper_suv_min[i] <- df_filtered$r_upper_suv_min[i]
    df_filtered$l_upper_suv_mean[i] <- df_filtered$r_upper_suv_mean[i]
    df_filtered$l_upper_suv_sd[i] <- df_filtered$r_upper_suv_sd[i]
  }
  if (is.na(df_filtered$r_upper_suv_mean[i]) & !is.na(df_filtered$l_upper_suv_mean[i])) {
    df_filtered$r_upper_suv_max[i] <- df_filtered$l_upper_suv_max[i]
    df_filtered$r_upper_suv_min[i] <- df_filtered$l_upper_suv_min[i]
    df_filtered$r_upper_suv_mean[i] <- df_filtered$l_upper_suv_mean[i]
    df_filtered$r_upper_suv_sd[i] <- df_filtered$l_upper_suv_sd[i]
  }
  if (is.na(df_filtered$l_lower_suv_mean[i]) & !is.na(df_filtered$r_lower_suv_mean[i])) {
    df_filtered$l_lower_suv_max[i] <- df_filtered$r_lower_suv_max[i]
    df_filtered$l_lower_suv_min[i] <- df_filtered$r_lower_suv_min[i]
    df_filtered$l_lower_suv_mean[i] <- df_filtered$r_lower_suv_mean[i]
    df_filtered$l_lower_suv_sd[i] <- df_filtered$r_lower_suv_sd[i]
  }
  if (is.na(df_filtered$r_lower_suv_mean[i]) & !is.na(df_filtered$l_lower_suv_mean[i])) {
    df_filtered$r_lower_suv_max[i] <- df_filtered$l_lower_suv_max[i]
    df_filtered$r_lower_suv_min[i] <- df_filtered$l_lower_suv_min[i]
    df_filtered$r_lower_suv_mean[i] <- df_filtered$l_lower_suv_mean[i]
    df_filtered$r_lower_suv_sd[i] <- df_filtered$l_lower_suv_sd[i]
  }
  if (!is.na(df_filtered$side_r0_l1[i])) {
    if (df_filtered$side_r0_l1[i] == 1 & !is.na(df_filtered$l_lower_suv_mean[i])) {
      df_filtered$no_tumor_suv_mean[i] <- mean(df_filtered$l_lower_suv_mean[i], df_filtered$l_upper_suv_mean[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_min[i] <- mean(df_filtered$l_lower_suv_min[i], df_filtered$l_upper_suv_min[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_max[i] <- mean(df_filtered$l_lower_suv_max[i], df_filtered$l_upper_suv_max[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_sd[i] <- mean(df_filtered$l_lower_suv_sd[i], df_filtered$l_upper_suv_sd[i], na.rm = TRUE)
    } else if (df_filtered$side_r0_l1[i] == 0 & !is.na(df_filtered$r_lower_suv_mean[i])) {
      df_filtered$no_tumor_suv_mean[i] <- mean(df_filtered$r_lower_suv_mean[i], df_filtered$r_upper_suv_mean[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_min[i] <- mean(df_filtered$r_lower_suv_min[i], df_filtered$r_upper_suv_min[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_max[i] <- mean(df_filtered$r_lower_suv_max[i], df_filtered$r_upper_suv_max[i], na.rm = TRUE)
      df_filtered$no_tumor_suv_sd[i] <- mean(df_filtered$r_lower_suv_sd[i], df_filtered$r_upper_suv_sd[i], na.rm = TRUE)
    }
  }
}

df_filtered$upper_suv_mean <- rowMeans(df_filtered[, c("r_upper_suv_mean", "l_upper_suv_mean")], na.rm = TRUE)
df_filtered$upper_suv_max <- rowMeans(df_filtered[, c("r_upper_suv_max", "l_upper_suv_max")], na.rm = TRUE)
df_filtered$upper_suv_min <- rowMeans(df_filtered[, c("r_upper_suv_min", "l_upper_suv_min")], na.rm = TRUE)
df_filtered$upper_suv_sd <- rowMeans(df_filtered[, c("r_upper_suv_sd", "l_upper_suv_sd")], na.rm = TRUE)

df_filtered$lower_suv_mean <- rowMeans(df_filtered[, c("r_lower_suv_mean", "l_lower_suv_mean")], na.rm = TRUE)
df_filtered$lower_suv_max <- rowMeans(df_filtered[, c("r_lower_suv_max", "l_lower_suv_max")], na.rm = TRUE)
df_filtered$lower_suv_min <- rowMeans(df_filtered[, c("r_lower_suv_min", "l_lower_suv_min")], na.rm = TRUE)
df_filtered$lower_suv_sd <- rowMeans(df_filtered[, c("r_lower_suv_sd", "l_lower_suv_sd")], na.rm = TRUE)

df_filtered$suv_lung_mean <- rowMeans(df_filtered[, c("upper_suv_mean", "lower_suv_mean")], na.rm = TRUE)
df_filtered$suv_lung_max <- rowMeans(df_filtered[, c("upper_suv_max", "lower_suv_max")], na.rm = TRUE)
df_filtered$suv_lung_min <- rowMeans(df_filtered[, c("upper_suv_min", "lower_suv_min")], na.rm = TRUE)
df_filtered$suv_lung_sd <- rowMeans(df_filtered[, c("upper_suv_sd", "lower_suv_sd")], na.rm = TRUE)

for (i in 1:nrow(df_filtered)) {
  df_filtered$SUV95[i] <- df_filtered$suv_lung_mean[i] + (qnorm(0.95) * df_filtered$suv_lung_sd[i])
  df_filtered$LBM[i] <- calculate_LBM(df_filtered$bmi[i], df_filtered$size[i], df_filtered$weight[i], df_filtered$sex_male[i])
  df_filtered$SULmean[i] <- calculate_SUL(df_filtered$suv_lung_mean[i], df_filtered$LBM[i], df_filtered$size[i])
  df_filtered$SULmax[i] <- calculate_SUL(df_filtered$suv_lung_max[i], df_filtered$LBM[i], df_filtered$size[i])
}
#Description: This section fills in missing SUV values and calculates various metrics based on available data


#7. Rename Variables
colnames(df_filtered)[colnames(df_filtered) == "suv_lung_max"] <- "SUVmax"
colnames(df_filtered)[colnames(df_filtered) == "suv_lung_mean"] <- "SUVmean"
colnames(df_filtered)[colnames(df_filtered) == "age"] <- "Age"
colnames(df_filtered)[colnames(df_filtered) == "sex_male"] <- "Male Sex"
colnames(df_filtered)[colnames(df_filtered) == "bmi"] <- "BMI"
colnames(df_filtered)[colnames(df_filtered) == "copd"] <- "COPD"
colnames(df_filtered)[colnames(df_filtered) == "radio_after_pet"] <- "Radiation¹"

#Description: Update the variable names for better readability and interpretation.


#8. Create Subgroup
df_filtered_subgroup <- subset(df_filtered, pet_device == 6)
#Description: Creates a subgroup with only patients scanned using the same PET scanner and prints the distribution of pneumonitis status.


#9. Perform logistic regression and create forest plots
#SUVmax
formula <- as.formula("pneumonitis ~ SUVmax + `Male Sex` + Age + BMI + COPD + `Radiation¹`")
modelMAX <- glm(formula, data = df_filtered_subgroup, family = binomial)
suppressWarnings(forest_plot_max <- forest_model(model = modelMAX))
forest_plot_max <- forest_plot_max + theme(axis.text.x = element_text(size = 10, angle = 90, hjust = 1, vjust = 0.5))

#SUVmean
formula <- as.formula("pneumonitis ~ SUVmean + `Male Sex` + Age + BMI + COPD + `Radiation¹`")
modelMEAN <- glm(formula, data = df_filtered, family = binomial)
suppressWarnings(forest_plot_mean <- forest_model(model = modelMEAN))
forest_plot_mean <- forest_plot_mean + theme(axis.text.x = element_text(size = 10, angle = 90, hjust = 1, vjust = 0.5))

#SUV95
formula <- as.formula("pneumonitis ~ SUV95 + `Male Sex` + Age + BMI + COPD + `Radiation¹`")
model95 <- glm(formula, data = df_filtered, family = binomial)
suppressWarnings(forest_plot_95 <- forest_model(model = model95))
forest_plot_95 <- forest_plot_95 + theme(axis.text.x = element_text(size = 10, angle = 90, hjust = 1, vjust = 0.5))

#SULmean
formula <- as.formula("pneumonitis ~ SULmean + `Male Sex` + Age + BMI + COPD + `Radiation¹`")
modelSULMEAN <- glm(formula, data = df_filtered, family = binomial)
suppressWarnings(forest_plot_SULmean <- forest_model(model = modelSULMEAN))
forest_plot_SULmean <- forest_plot_SULmean + theme(axis.text.x = element_text(size = 10, angle = 90, hjust = 1, vjust = 0.5))

#SULmax
formula <- as.formula("pneumonitis ~ SULmax + `Male Sex` + Age + BMI + COPD + `Radiation¹`")
modelSULMAX <- glm(formula, data = df_filtered_subgroup, family = binomial)
suppressWarnings(forest_plot_SULmax <- forest_model(model = modelSULMAX))
forest_plot_SULmax <- forest_plot_SULmax + theme(axis.text.x = element_text(size = 10, angle = 90, hjust = 1, vjust = 0.5))


#Combine Forest Plots into a Grid
combined_plots <- grid.arrange(forest_plot_mean, forest_plot_max, forest_plot_95, forest_plot_SULmean, forest_plot_SULmax, ncol = 2)
#Desxription: Creates and fits logistic regression models with different predictors (SUVmax, SUVmean, SUV95, SULmax, SULmean). Generates forest plots to visualize the results and arranges them into a grid for saving.


#8. Save Grid
ggsave("Figure 1.tiff", combined_plots, width = 16, height = 12)
print("saved: Figure 1.png")
#Saves the grid to a PNG file. 


#9. Extract and Report Statistical Summary for 'Age' and 'Radiation*' Coefficients
#Minimum p-values for Age
min_Age_p_value <- min(
  summary(modelMEAN)$coefficients["Age", "Pr(>|z|)"],
  summary(modelMAX)$coefficients["Age", "Pr(>|z|)"],
  summary(modelSULMEAN)$coefficients["Age", "Pr(>|z|)"],
  summary(modelSULMAX)$coefficients["Age", "Pr(>|z|)"],
  summary(model95)$coefficients["Age", "Pr(>|z|)"]
)

#Maximum p-values for Age
max_Age_p_value <- max(
  summary(modelMEAN)$coefficients["Age", "Pr(>|z|)"],
  summary(modelMAX)$coefficients["Age", "Pr(>|z|)"],
  summary(modelSULMEAN)$coefficients["Age", "Pr(>|z|)"],
  summary(modelSULMAX)$coefficients["Age", "Pr(>|z|)"],
  summary(model95)$coefficients["Age", "Pr(>|z|)"]
)

#Minimum Coefficients for Age
min_Age_coefficient <- min(
  summary(modelMEAN)$coefficients["Age", "Estimate"],
  summary(modelMAX)$coefficients["Age", "Estimate"],
  summary(modelSULMEAN)$coefficients["Age", "Estimate"],
  summary(modelSULMAX)$coefficients["Age", "Estimate"],
  summary(model95)$coefficients["Age", "Estimate"]
)

#Maximum Coefficients for Age
max_Age_coefficient <- max(
  summary(modelMEAN)$coefficients["Age", "Estimate"],
  summary(modelMAX)$coefficients["Age", "Estimate"],
  summary(modelSULMEAN)$coefficients["Age", "Estimate"],
  summary(modelSULMAX)$coefficients["Age", "Estimate"],
  summary(model95)$coefficients["Age", "Estimate"]
)

#Maximum p-values for Radiation
min_Radiation_p_value <- min(
  summary(modelMEAN)$coefficients["`Radiation¹`", "Pr(>|z|)"],
  summary(modelSULMEAN)$coefficients["`Radiation¹`", "Pr(>|z|)"],
  summary(model95)$coefficients["`Radiation¹`", "Pr(>|z|)"]
)

#Maximum p-values for Radiation
max_Radiation_p_value <- max(
  summary(modelMEAN)$coefficients["`Radiation¹`", "Pr(>|z|)"],
  summary(modelSULMEAN)$coefficients["`Radiation¹`", "Pr(>|z|)"],
  summary(model95)$coefficients["`Radiation¹`", "Pr(>|z|)"]
)

#Minimum Coefficients for Radiation
min_Radiation_coefficient <- min(
  summary(modelMEAN)$coefficients["`Radiation¹`", "Estimate"],
  summary(modelSULMEAN)$coefficients["`Radiation¹`", "Estimate"],
  summary(model95)$coefficients["`Radiation¹`", "Estimate"]
)

#Maximum Coefficients for Radiation
max_Radiation_coefficient <- max(
  summary(modelMEAN)$coefficients["`Radiation¹`", "Estimate"],
  summary(modelSULMEAN)$coefficients["`Radiation¹`", "Estimate"],
  summary(model95)$coefficients["`Radiation¹`", "Estimate"]
)
#Description: Extracts the minimum and maximum values for p-values and coefficients of the predictors ‘Age’ and ‘Radiation*’ from the models.


#13. Print Additional Notes
print (paste("However, the patient’s age was significantly associated with the risk of irPneumonitis (β = ",round(min_Age_coefficient,3), 
             " - ",round(max_Age_coefficient,3),"; p = ",round(min_Age_p_value,3),
             " - ",round(max_Age_p_value,3),"). Additionally, thorax radiation during ICI therapy was significantly associated with the risk of irPneumonitis when using SUVMEAN, SULMEAN or SUV95 as independent variables; β = ",round(min_Radiation_coefficient,3),
             " - ",round(max_Radiation_coefficient,3),"; p = ",round(min_Radiation_p_value,3),
             " - ",round(max_Radiation_p_value,3),")."))
#Description: Rports the minimum and maximum values for p-values and coefficients of the predictors ‘Age’ and ‘Radiation*’ from the models.

